*******************************************************

/*  The Impact of the US-China Trade War on Vietnam's Labor Market
 Pham Phuong Ngoc and Dainn Wie
 
 This is do file to construct tariff different variables and combine with LFS data.

Inputs:

1. HS2012.dta 
2. import_data.dta 
3. China_target_hs8.dta 
4. China_target_hs10.dta 
5. LFS.dta 
6. Vietnamtoworld.dta 

*/

clear all
capture log close
set more off

cd "C:\Replication"

********************************************************************************
*** PART 1: Construct tariff change in 2018 and 2019 
********************************************************************************

* Open the trade import value data of US from all countries
use "Data\import_data.dta", clear

rename m_val0 cifvalue_world
rename m_val1 cifvalue_china
drop if (cifvalue_world==. & cifvalue_china==.)

keep if year==2015

* Convert HS 6 digit code to ISIC 
merge m:1 hs6 using "Data\HS2012.dta", keep (match master) keepusing (isic3) generate (merge1)

replace isic3 =. if isic3==1| isic3==3 | isic3==26 | isic3==27 | merge1==1
count if isic3==.

drop merge1

preserve
egen totalvalue = sum (cifvalue_world)
egen value = sum (cifvalue_world) if isic3==.
generate a = value/totalvalue
sum a

egen totalvalue1 = sum(cifvalue_china)
egen value1 = sum (cifvalue_china) if isic3==.
generate b = value1/totalvalue1
sum b
restore

* I will drop the product code which cannot convert to ISIC 3 digit code 
drop if isic3==.

* generate variable for total value of US import in each industry code
egen value = sum (cifvalue_world), by (isic3)

* generate import value weight of each 10-digit HS code within 3 digit industries using China export data
generate weight=cifvalue_china/value
label variable weight "Weight is determined by china import in each hs over total US import of industry"

* merge to get the target products in US - China trade war
merge m:1 hs8 using "Data\China_target_hs8.dta", keep (match master) generate (merge2)
merge m:1 hs10 using "Data\China_target_hs10.dta", keep (match master) generate (merge3)
replace heading=heading1 if merge3==3
drop heading1

* generate the average US import tariff difference between Vietnam and China in 2018 and 2019
generate deta_tariff2018=0
replace deta_tariff2018=((25*0.25/31 + 0.25*5)/12) if heading==99038801
replace deta_tariff2018=((8*0.25/31 + 0.25*4)/12) if heading==99038802
replace deta_tariff2018=((6*0.10/30 + 0.10*3)/12) if heading==99038803 | heading==99038804
tab deta_tariff2018
sum deta_tariff2018 if deta_tariff2018!=0

generate deta_tariff2019=0
replace deta_tariff2019=0.25 if heading==99038801 | heading==99038802
replace deta_tariff2019=((0.10*4+0.10*10/31+21*0.25/31+0.25*7)/12) if heading==99038803| heading==99038804
replace deta_tariff2019=(0.15*4/12) if heading==99038815
tab deta_tariff2019
sum deta_tariff2019 if deta_tariff2019!=0

* interact the US import tariff difference with the weights of china import in each hs over total US import of industry
generate wt_tariff2018 = deta_tariff2018 * weight
generate wt_tariff2019 = deta_tariff2019 * weight

* collapse to get average US import tariff difference between Vietnam and China in 2018 and 2019 in each 3-digit ISIC industry code
collapse (sum) wt_tariff2018 wt_tariff2019, by(isic3) 

sum wt_tariff2018 wt_tariff2019

generate wt_tariff2017=0
generate wt_tariff2016=0 

*generate a variable indicates tariff difference, to get the placebo results
generate tariff_placebo2014 = 0
generate tariff_placebo2015 = 0
generate tariff_placebo2016 = wt_tariff2018
generate tariff_placebo2017 = wt_tariff2019

* reshape the dataset
reshape long wt_tariff tariff_placebo, i(isic3) j(year)
label variable wt_tariff "The tariff difference in each 3 digit industry using weights of china import in each hs over total US import of industry"
label variable tariff_placebo "Placebo analysis: The tariff difference in each 3 digit industry using weights of china import in each hs over total US import of industry"

replace wt_tariff=0 if wt_tariff==.
replace tariff_placebo=0 if tariff_placebo==.

sort year
by year: sum wt_tariff
by year: sum tariff_placebo

* save the dataset
save "Data\tariff_difference.dta", replace


********************************************************************************
*** PART 2: Classifying Trade Sectors
********************************************************************************

// Trade sectors are the industries which Vietnam export to foreign countries - I will used Trade data of vietnam in 2015 to classifying //

* Open dataset
use "Data\Vietnamtoworld.dta", clear

* rename the variable
keep hs6 vietnam_export

* convert hs6 to isic 3 digit
merge m:1 hs6 using "Data\HS2012.dta", keep (match master) keepusing (isic3) generate (merge1)
drop if merge1==1
// for unspecific products //

* collapse to get total trade value by industry
collapse (sum) vietnam_export, by(isic3)

* generate trading variables
gen trading = 1
 
* save dataset
save "Data\tradingsector.dta", replace

********************************************************************************
*** PART 3: Get the master data
********************************************************************************

* Open LFS dataset
use "Data\LFS.dta", clear

* keep individuals who are above 20 years old 
keep if (age>=20 & age<=64) 

* Foodnote 1: unemployment rate
preserve
keep if year ==2019
tab employment_status
restore 

* keep individuals who worked
keep if employment_status==1

* convert VSIC to ISIC 
// LFS 2016 - 2018 using VSIC 2007 while LFS 2019 using VSIC 2018 //
generate isic3=int(industry/10)
replace isic3=72 if isic3==73
replace isic3=139 if isic3==132 & year!=2019
replace isic3=492 if isic3==493
replace isic3=493 if isic3==494
replace isic3=639 if isic3==632 & year!=2019
replace isic3=799 if isic3==792 & year!=2019
replace isic3=851 if isic3==852 
replace isic3=852 if isic3==853 & year!=2019
replace isic3=853 if isic3==854
replace isic3=854 if isic3==855
replace isic3=855 if isic3==856
replace isic3=960 if isic3==961 | isic3==962| isic3==963
label variable isic3 "ISIC code 3 digit for industry"

* correct observations with wrong isic3 code (still used VSIC 2007 in LFS 2019, for instance)
replace isic3=139 if isic3==132
replace isic3=370 if isic3==371 | isic3==372
replace isic3=639 if isic3==632
replace isic3=799 if isic3==792

* drop observations with wrong isic3 code (the code that does not belong to ISIC)
drop if isic3==18| isic3==30 | isic3==130| isic3==140| isic3==160| isic3==180| isic3==193| isic3==220| isic3==230| isic3==250| isic3==420 | isic3==425| isic3==430| isic3==456| isic3==460| isic3==470| isic3==515| isic3==520| isic3==525| isic3==610| isic3==615| isic3==628| isic3==710 | isic3==730| isic3==810| isic3==814| isic3==820| isic3==870

generate isic2 = int(isic3/10)
label variable isic2 "ISIC code 2 digit for industry"

generate isic1 = int(isic3/100)
label variable isic1 "ISIC code 1 digit for industry"

generate isic_name="."
replace isic_name ="Agriculture" if isic2<=3
replace isic_name ="Mining" if isic2>=5 & isic2<=9
replace isic_name ="Manufacturing" if isic2>=10 & isic2<=33
replace isic_name ="Electricity, gas, water supply" if isic2>=35 & isic2<=39
replace isic_name ="Construction" if isic2>=41 & isic2<=43
replace isic_name ="Wholesale - retail trade" if isic2>=45 & isic2<=47
replace isic_name ="Transportation" if isic2>=49 & isic2<=53
replace isic_name ="Accommodation service" if isic2>=55 & isic2<=56
replace isic_name ="Information, communication" if isic2>=58 & isic2<=63
replace isic_name ="Financial and insurance" if isic2>=64 & isic2<=66
replace isic_name ="Real estate" if isic2==68
replace isic_name ="Professional and technical" if isic2>=69 & isic2<=75
replace isic_name ="Administrative service" if isic2>=77 & isic2<=82
replace isic_name ="Public administration" if isic2==84
replace isic_name ="Education" if isic2==85
replace isic_name ="Human health and social work" if isic2>=86 & isic2<=88
replace isic_name ="Arts, entertainment" if isic2>=90 & isic2<=93
replace isic_name ="Others" if isic2>=94

generate isic ="."
replace isic ="A" if isic2<=3
replace isic ="B" if isic2>=5 & isic2<=9
replace isic ="C" if isic2>=10 & isic2<=33
replace isic ="D" if isic2==35
replace isic ="E" if isic2>=36 & isic2<=39
replace isic ="F" if isic2>=41 & isic2<=43
replace isic ="G" if isic2>=45 & isic2<=47
replace isic ="H" if isic2>=49 & isic2<=53
replace isic ="I" if isic2>=55 & isic2<=56
replace isic ="J" if isic2>=58 & isic2<=63
replace isic ="K" if isic2>=64 & isic2<=66
replace isic ="L" if isic2==68
replace isic ="M" if isic2>=69 & isic2<=75
replace isic ="N" if isic2>=77 & isic2<=82
replace isic ="O" if isic2==84
replace isic ="P" if isic2==85
replace isic ="Q" if isic2>=86 & isic2<=88
replace isic ="R" if isic2>=90 & isic2<=93
replace isic ="S" if isic2>=94 & isic2<=96
replace isic ="T" if isic2>=97 & isic2<=98
replace isic ="U" if isic2>=99

* drop observations with a missing value for industry
drop if isic3==. 

* drop those report working but have actual working hours equal 0 or missing values
drop if main_workinghours==. | main_workinghours==0

* generate dummy variable for employees working in manufacturing sectors
generate manu =0
replace manu=1 if (isic3>=101 & isic3<=332)
label variable manu "Manufacturing Sectors"

* generate dummy variable for employees working in trading sectors
merge m:1 isic3 using "Data\tradingsector.dta", keep (match master) keepusing (trading) generate (merge3)
replace trading = 0 if trading ==.
label variable trading "Trading Sectors"
drop merge3

* generate dummy variable for employees working in non-agriculture sectors
generate noag=(isic3>32)
label variable noag "Non-agriculture Sectors"

* generate ln of income: using the usual working hours per week
generate income_hour = main_income / (main_hours * 4)
label variable income_hour "Employees income per hour in main job"

* generate ln of working hours per week in main jobs
generate lnhours=ln(main_hours)
label variable lnhours "Logarith of working hours per week in main jobs"

* add the yearly deflators to convert everything to 2016 dong
gen ycpi=1 if year==2016
replace ycpi= 148.407/ 153.63 if year==2017
replace ycpi= 148.407/ 159.07 if year==2018
replace ycpi=148.407/163.517 if year==2019
label variable ycpi "CPI: Multiply from base year to get to 2016"

gen rincome = (income_hour)*ycpi
label variable rincome "Real income in 2016 prices"

generate lnincome=ln(rincome)
label variable lnincome "Logarith of income per hour"

* check the total working hours in all jobs = working hours in main job + other jobs

gen a = workinghours - main_workinghours - other_workinghours
replace workinghours = main_workinghours + other_workinghours if a!=0 & a!=.
drop a

gen b = hours - main_hours - other_hours
replace hours = main_hours + other_hours if b!=0 & b!=.
drop b

* generate variable age square
generate agesquare = age * age
label variable agesquare "Square of age"

* generate variable for experience
generate exp = age - schoolyear - 6
replace exp=0 if exp<0
label variable exp "Number of experienced year"

generate expsquare = exp * exp /100
label variable expsquare "Square of experienced year, divided by 100"

* generate variables for informal workers
// Definition: 1. informal employees: those have no long term labor contract or compulsory social insurance; 0. Formal employees: those have long term labor contracts and compulsory social insurance //
generate informal=.
replace informal =0 if labor_contract<=3 & insurance==1 & type_insurance==1
replace informal=1 if (labor_contract>3 & labor_contract!=.)| insurance==0| type_insurance==2
count if informal ==.
label variable informal "Whether employee is informal or formal employment"
tab informal year

gen uninsured=.
replace uninsured=1 if insurance==0 
replace uninsured=0 if insurance==1 
label variable uninsured "Dummy variable indicate workers without insured"

* generate indicator for agriculture, forestry, and aquaculture
gen agri = (isic3<=32)

drop if education==.

* generate an indicator for having a second job
gen second = 0
replace second = 1 if other_workinghours!=0
label variable second "Whether individual has another job or not: 1 is has second job"

* merge data with tariff_difference
merge m:1 year isic3 using "Data\tariff_difference.dta", keep (match master) generate (merge3)
replace wt_tariff=0 if wt_tariff==.
replace tariff_placebo=0 if tariff_placebo==.

* generate interaction term between tariff difference and manu
generate tariff_manu = wt_tariff * manu
label variable tariff_manu "interaction term between tariff difference and Manufacturing"

* generate interaction term between tariff difference and informal workers
generate tariff_informal = wt_tariff * informal
label variable tariff_informal "interaction term between tariff difference and informal workers"

* generate interaction term between tariff difference and uninsured workers
generate tariff_uninsured = wt_tariff * uninsured
label variable tariff_uninsured "interaction term between tariff difference and uninsured workers"

* save file
save "Data\master.dta", replace

log close
